﻿using System;
using System.Text.RegularExpressions;

class ExtractTextFromHTML
{
    static void Main()
    {
        //Exercise 25.
        //Write a program that extracts from given HTML file its title (if available), and its body text without the HTML tags. 
        //Example:
        //<html>
        //  <head><title>News</title></head>
        //  <body><p><a href="http://academy.telerik.com">Telerik
        //  Academy</a>aims to provide free real-world practical
        //  training for young people who want to turn into
        //  skillful .NET software engineers.</p></body>
        //</html>

        string allText = @"<html><head><title>News</title></head><body><p><a href=""http://academy.telerik.com"">Telerik
                            Academy</a>aims to provide free real-world practical
                            training for young people who want to turn into
                            skillful .NET software engineers.</p></body>
                            </html>";

        MatchCollection xml = Regex.Matches(allText, @"(?<=^|>)[^><]+?(?=<|$)");
        foreach (var item in xml)
        {
            Console.WriteLine(item);
        }
    }
}

